import re

from dataset_analysis.utils import *

## 获取sql列表
def get_sqls():
    file_path = '/Users/hwj/PycharmProjects/study/dataset_analysis/data_center_langchao/inspur_sqls.csv'

    file = pd.read_csv(file_path, encoding='utf-8')
    sqls = []

    for i in range(0, file.shape[0]):
        try:
            if not isinstance(file.iloc[i, 1], str) or 'my' not in file.iloc[i, 1]:
                continue
            sql = ' '.join((file.iloc[i, 5]).split())
            re.sub(r'\s+', ' ', sql)

            sqls.append(sql)
        except:
            print((f'**********************************{file.iloc[i, 5]}'))
    return sqls


sqls = get_sqls()

## 生成骨架统计详情文件
skeletons, skeleton_types = get_skeleton_models(sqls)
save_skeleton_models(skeleton_models=skeletons, file_name='langchao_skeleton_data.xlsx')

## 生成骨架类型统计文件
save_skeleton_type(skeleton_types, file_name='langchao_skeleton_type_data.xlsx')

## 生成关键字统计文件
keyword_models = get_keyword_models(sqls)
save_keyword_models(keyword_models=keyword_models, file_name='langchao_keyword_data.xlsx')
